{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('../Flickr8k_text/Flickr_8k.trainImages.txt','r') as tr_imgs:\n",
    "    train_imgs = tr_imgs.read().splitlines()\n",
    "    \n",
    "with open('../Flickr8k_text/Flickr_8k.devImages.txt','r') as dv_imgs:\n",
    "    dev_imgs = dv_imgs.read().splitlines()\n",
    "    \n",
    "with open('../Flickr8k_text/Flickr_8k.testImages.txt','r') as ts_imgs:\n",
    "    test_imgs = ts_imgs.read().splitlines()\n",
    "    \n",
    "with open('../Flickr8k_text/Flickr8k.token.txt','r') as img_tkns:\n",
    "    captions = img_tkns.read().splitlines()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_imgs = train_imgs + dev_imgs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "\n",
    "caption_map = defaultdict(list)\n",
    "\n",
    "for record in captions:\n",
    "    record = record.split('\\t')\n",
    "    img_name = record[0][:-2]\n",
    "    img_caption = record[1].strip()\n",
    "    caption_map[img_name].append(img_caption)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/lib/python3.4/importlib/_bootstrap.py:321: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  return f(*args, **kwds)\n",
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from keras.preprocessing import image\n",
    "from keras.applications.vgg16 import preprocess_input as preprocess_vgg16_input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def process_image2arr(path, img_dims=(224, 224)):\n",
    "    img = image.load_img(path, target_size=img_dims)\n",
    "    img_arr = image.img_to_array(img)\n",
    "    img_arr = np.expand_dims(img_arr, axis=0)\n",
    "    img_arr = preprocess_vgg16_input(img_arr)\n",
    "    return img_arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from keras.applications import vgg16\n",
    "from keras.models import Model\n",
    "\n",
    "\n",
    "vgg_model = vgg16.VGG16(include_top=True, weights='imagenet', \n",
    "                        input_shape=(224, 224, 3))\n",
    "vgg_model.layers.pop()\n",
    "output = vgg_model.layers[-1].output\n",
    "vgg_model = Model(vgg_model.input, output)\n",
    "vgg_model.trainable = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "input_1 (InputLayer)         (None, 224, 224, 3)       0         \n",
      "_________________________________________________________________\n",
      "block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      \n",
      "_________________________________________________________________\n",
      "block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     \n",
      "_________________________________________________________________\n",
      "block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         \n",
      "_________________________________________________________________\n",
      "block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     \n",
      "_________________________________________________________________\n",
      "block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    \n",
      "_________________________________________________________________\n",
      "block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         \n",
      "_________________________________________________________________\n",
      "block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    \n",
      "_________________________________________________________________\n",
      "block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    \n",
      "_________________________________________________________________\n",
      "block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    \n",
      "_________________________________________________________________\n",
      "block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         \n",
      "_________________________________________________________________\n",
      "block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   \n",
      "_________________________________________________________________\n",
      "block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   \n",
      "_________________________________________________________________\n",
      "block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   \n",
      "_________________________________________________________________\n",
      "block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         \n",
      "_________________________________________________________________\n",
      "block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   \n",
      "_________________________________________________________________\n",
      "block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   \n",
      "_________________________________________________________________\n",
      "block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   \n",
      "_________________________________________________________________\n",
      "block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         \n",
      "_________________________________________________________________\n",
      "flatten (Flatten)            (None, 25088)             0         \n",
      "_________________________________________________________________\n",
      "fc1 (Dense)                  (None, 4096)              102764544 \n",
      "_________________________________________________________________\n",
      "fc2 (Dense)                  (None, 4096)              16781312  \n",
      "=================================================================\n",
      "Total params: 134,260,544\n",
      "Trainable params: 0\n",
      "Non-trainable params: 134,260,544\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "vgg_model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def extract_tl_features_vgg(model, image_file_name, image_dir='../Flickr8k_imgs/'):\n",
    "    \n",
    "    pr_img = process_image2arr(image_dir+image_file_name)\n",
    "    tl_features = model.predict(pr_img)\n",
    "    tl_features = np.reshape(tl_features, tl_features.shape[1])\n",
    "    return tl_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "img_tl_featureset = dict()\n",
    "train_img_names = []\n",
    "train_img_captions = []\n",
    "test_img_names = []\n",
    "test_img_captions = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train Dataset Size: 35000 \tTest Dataset Size: 5000\n"
     ]
    }
   ],
   "source": [
    "for img in train_imgs:\n",
    "    img_tl_featureset[img] = extract_tl_features_vgg(model=vgg_model, image_file_name=img)\n",
    "    for caption in caption_map[img]:\n",
    "        train_img_names.append(img)\n",
    "        train_img_captions.append(caption)\n",
    "        \n",
    "for img in test_imgs:\n",
    "    img_tl_featureset[img] = extract_tl_features_vgg(model=vgg_model, image_file_name=img)\n",
    "    for caption in caption_map[img]:\n",
    "        test_img_names.append(img)\n",
    "        test_img_captions.append(caption)\n",
    "        \n",
    "train_dataset = pd.DataFrame({'image': train_img_names, 'caption': train_img_captions})\n",
    "test_dataset = pd.DataFrame({'image': test_img_names, 'caption': test_img_captions})\n",
    "print('Train Dataset Size:', len(train_dataset), '\\tTest Dataset Size:', len(test_dataset))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caption</th>\n",
       "      <th>image</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A black dog is running after a white dog in th...</td>\n",
       "      <td>2513260012_03d33305cf.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Black dog chasing brown dog through snow</td>\n",
       "      <td>2513260012_03d33305cf.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Two dogs chase each other across the snowy gro...</td>\n",
       "      <td>2513260012_03d33305cf.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Two dogs play together in the snow .</td>\n",
       "      <td>2513260012_03d33305cf.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Two dogs running through a low lying body of w...</td>\n",
       "      <td>2513260012_03d33305cf.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>A little baby plays croquet .</td>\n",
       "      <td>2903617548_d3e38d7f88.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>A little girl plays croquet next to a truck .</td>\n",
       "      <td>2903617548_d3e38d7f88.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>The child is playing croquette by the truck .</td>\n",
       "      <td>2903617548_d3e38d7f88.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>The kid is in front of a car with a put and a ...</td>\n",
       "      <td>2903617548_d3e38d7f88.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>The little boy is playing with a croquet hamme...</td>\n",
       "      <td>2903617548_d3e38d7f88.jpg</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             caption  \\\n",
       "0  A black dog is running after a white dog in th...   \n",
       "1           Black dog chasing brown dog through snow   \n",
       "2  Two dogs chase each other across the snowy gro...   \n",
       "3               Two dogs play together in the snow .   \n",
       "4  Two dogs running through a low lying body of w...   \n",
       "5                      A little baby plays croquet .   \n",
       "6      A little girl plays croquet next to a truck .   \n",
       "7      The child is playing croquette by the truck .   \n",
       "8  The kid is in front of a car with a put and a ...   \n",
       "9  The little boy is playing with a croquet hamme...   \n",
       "\n",
       "                       image  \n",
       "0  2513260012_03d33305cf.jpg  \n",
       "1  2513260012_03d33305cf.jpg  \n",
       "2  2513260012_03d33305cf.jpg  \n",
       "3  2513260012_03d33305cf.jpg  \n",
       "4  2513260012_03d33305cf.jpg  \n",
       "5  2903617548_d3e38d7f88.jpg  \n",
       "6  2903617548_d3e38d7f88.jpg  \n",
       "7  2903617548_d3e38d7f88.jpg  \n",
       "8  2903617548_d3e38d7f88.jpg  \n",
       "9  2903617548_d3e38d7f88.jpg  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_dataset.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_dataset = train_dataset[['image', 'caption']]\n",
    "test_dataset = test_dataset[['image', 'caption']]\n",
    "\n",
    "train_dataset.to_csv('image_train_dataset.tsv', sep='\\t', index=False)\n",
    "test_dataset.to_csv('image_test_dataset.tsv', sep='\\t', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['transfer_learn_img_features.pkl']"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.externals import joblib\n",
    "\n",
    "joblib.dump(img_tl_featureset, 'transfer_learn_img_features.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('3079787482_0757e9d167.jpg', (4096,)),\n",
       " ('3284955091_59317073f0.jpg', (4096,)),\n",
       " ('1795151944_d69b82f942.jpg', (4096,)),\n",
       " ('3532192208_64b069d05d.jpg', (4096,)),\n",
       " ('454709143_9c513f095c.jpg', (4096,))]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[(key, value.shape) for key, value in img_tl_featureset.items()][:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('3079787482_0757e9d167.jpg',\n",
       "  array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)),\n",
       " ('3284955091_59317073f0.jpg',\n",
       "  array([0.615, 0.   , 0.653, ..., 0.   , 1.559, 2.614], dtype=float32)),\n",
       " ('1795151944_d69b82f942.jpg',\n",
       "  array([0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.538], dtype=float32)),\n",
       " ('3532192208_64b069d05d.jpg',\n",
       "  array([0.   , 0.   , 0.   , ..., 0.   , 0.   , 2.293], dtype=float32)),\n",
       " ('454709143_9c513f095c.jpg',\n",
       "  array([0.   , 0.   , 0.131, ..., 0.833, 4.263, 0.   ], dtype=float32))]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[(k, np.round(v, 3)) for k, v in img_tl_featureset.items()][:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "35000"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df = pd.read_csv('image_train_dataset.tsv', delimiter='\\t')\n",
    "total_samples = train_df.shape[0]\n",
    "total_samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
